import os
import traceback

from doc_split import doc_split
from util.embeddings import docs_to_embedding


def read_docs(directory):
    result = {}
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    docs = doc_split(content)
                    result[file.replace(".txt", "")] = docs
            except Exception as e:
                print(traceback.format_exc())
                print(f"读取文件 {file_path} 时出现错误：{e}")
    return result


def doc_to_embedding(texts):
    pass

def insert_to_vdb():
    pass

def doc_proc_batch():
    docs = read_docs("data/docs")
    embs = docs_to_embedding(docs)
